1   package org.apache.lucene.index;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.List;
23  import java.util.Map;
24  import java.util.Random;
25  
26  import org.apache.lucene.analysis.MockAnalyzer;
27  import org.apache.lucene.search.DocIdSetIterator;
28  import org.apache.lucene.store.Directory;
29  import org.apache.lucene.store.MockDirectoryWrapper;
30  import org.apache.lucene.store.RAMDirectory;
31  import org.apache.lucene.util.ArrayUtil;
32  import org.apache.lucene.util.Bits;
33  import org.apache.lucene.util.BytesRef;
34  import org.apache.lucene.util.LuceneTestCase;
35  import org.apache.lucene.util.TestUtil;
36  
37  public class TestPerSegmentDeletes extends LuceneTestCase {
38    public void testDeletes1() throws Exception {
39      //IndexWriter.debug2 = System.out;
40      Directory dir = new MockDirectoryWrapper(new Random(random().nextLong()), new RAMDirectory());
41      IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
42      iwc.setMergeScheduler(new SerialMergeScheduler());
43      iwc.setMaxBufferedDocs(5000);
44      iwc.setRAMBufferSizeMB(100);
45      RangeMergePolicy fsmp = new RangeMergePolicy(false);
46      iwc.setMergePolicy(fsmp);
47      IndexWriter writer = new IndexWriter(dir, iwc);
48      for (int x = 0; x < 5; x++) {
49        writer.addDocument(DocHelper.createDocument(x, "1", 2));
50        //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
51      }
52      //System.out.println("commit1");
53      writer.commit();
54      assertEquals(1, writer.segmentInfos.size());
55      for (int x = 5; x < 10; x++) {
56        writer.addDocument(DocHelper.createDocument(x, "2", 2));
57        //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
58      }
59      //System.out.println("commit2");
60      writer.commit();
61      assertEquals(2, writer.segmentInfos.size());
62  
63      for (int x = 10; x < 15; x++) {
64        writer.addDocument(DocHelper.createDocument(x, "3", 2));
65        //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
66      }
67  
68      writer.deleteDocuments(new Term("id", "1"));
69  
70      writer.deleteDocuments(new Term("id", "11"));
71  
72      // flushing without applying deletes means
73      // there will still be deletes in the segment infos
74      writer.flush(false, false);
75      assertTrue(writer.bufferedUpdatesStream.any());
76  
77      // get reader flushes pending deletes
78      // so there should not be anymore
79      IndexReader r1 = writer.getReader();
80      assertFalse(writer.bufferedUpdatesStream.any());
81      r1.close();
82  
83      // delete id:2 from the first segment
84      // merge segments 0 and 1
85      // which should apply the delete id:2
86      writer.deleteDocuments(new Term("id", "2"));
87      writer.flush(false, false);
88      fsmp = (RangeMergePolicy) writer.getConfig().getMergePolicy();
89      fsmp.doMerge = true;
90      fsmp.start = 0;
91      fsmp.length = 2;
92      writer.maybeMerge();
93  
94      assertEquals(2, writer.segmentInfos.size());
95  
96      // id:2 shouldn't exist anymore because
97      // it's been applied in the merge and now it's gone
98      IndexReader r2 = writer.getReader();
99      int[] id2docs = toDocsArray(new Term("id", "2"), null, r2);
100     assertTrue(id2docs == null);
101     r2.close();
102 
103     /**
104     // added docs are in the ram buffer
105     for (int x = 15; x < 20; x++) {
106       writer.addDocument(TestIndexWriterReader.createDocument(x, "4", 2));
107       System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
108     }
109     assertTrue(writer.numRamDocs() > 0);
110     // delete from the ram buffer
111     writer.deleteDocuments(new Term("id", Integer.toString(13)));
112 
113     Term id3 = new Term("id", Integer.toString(3));
114 
115     // delete from the 1st segment
116     writer.deleteDocuments(id3);
117 
118     assertTrue(writer.numRamDocs() > 0);
119 
120     //System.out
121     //    .println("segdels1:" + writer.docWriter.deletesToString());
122 
123     //assertTrue(writer.docWriter.segmentDeletes.size() > 0);
124 
125     // we cause a merge to happen
126     fsmp.doMerge = true;
127     fsmp.start = 0;
128     fsmp.length = 2;
129     System.out.println("maybeMerge "+writer.segmentInfos);
130 
131     SegmentInfo info0 = writer.segmentInfos.info(0);
132     SegmentInfo info1 = writer.segmentInfos.info(1);
133 
134     writer.maybeMerge();
135     System.out.println("maybeMerge after "+writer.segmentInfos);
136     // there should be docs in RAM
137     assertTrue(writer.numRamDocs() > 0);
138 
139     // assert we've merged the 1 and 2 segments
140     // and still have a segment leftover == 2
141     assertEquals(2, writer.segmentInfos.size());
142     assertFalse(segThere(info0, writer.segmentInfos));
143     assertFalse(segThere(info1, writer.segmentInfos));
144 
145     //System.out.println("segdels2:" + writer.docWriter.deletesToString());
146 
147     //assertTrue(writer.docWriter.segmentDeletes.size() > 0);
148 
149     IndexReader r = writer.getReader();
150     IndexReader r1 = r.getSequentialSubReaders()[0];
151     printDelDocs(r1.getLiveDocs());
152     int[] docs = toDocsArray(id3, null, r);
153     System.out.println("id3 docs:"+Arrays.toString(docs));
154     // there shouldn't be any docs for id:3
155     assertTrue(docs == null);
156     r.close();
157 
158     part2(writer, fsmp);
159     **/
160     // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString());
161     //System.out.println("close");
162     writer.close();
163     dir.close();
164   }
165 
166   /**
167   static boolean hasPendingDeletes(SegmentInfos infos) {
168     for (SegmentInfo info : infos) {
169       if (info.deletes.any()) {
170         return true;
171       }
172     }
173     return false;
174   }
175   **/
176   void part2(IndexWriter writer, RangeMergePolicy fsmp) throws Exception {
177     for (int x = 20; x < 25; x++) {
178       writer.addDocument(DocHelper.createDocument(x, "5", 2));
179       //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
180     }
181     writer.flush(false, false);
182     for (int x = 25; x < 30; x++) {
183       writer.addDocument(DocHelper.createDocument(x, "5", 2));
184       //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
185     }
186     writer.flush(false, false);
187 
188     //System.out.println("infos3:"+writer.segmentInfos);
189 
190     Term delterm = new Term("id", "8");
191     writer.deleteDocuments(delterm);
192     //System.out.println("segdels3:" + writer.docWriter.deletesToString());
193 
194     fsmp.doMerge = true;
195     fsmp.start = 1;
196     fsmp.length = 2;
197     writer.maybeMerge();
198 
199     // deletes for info1, the newly created segment from the
200     // merge should have no deletes because they were applied in
201     // the merge
202     //SegmentInfo info1 = writer.segmentInfos.info(1);
203     //assertFalse(exists(info1, writer.docWriter.segmentDeletes));
204 
205     //System.out.println("infos4:"+writer.segmentInfos);
206     //System.out.println("segdels4:" + writer.docWriter.deletesToString());
207   }
208 
209   boolean segThere(SegmentCommitInfo info, SegmentInfos infos) {
210     for (SegmentCommitInfo si : infos) {
211       if (si.info.name.equals(info.info.name)) return true;
212     }
213     return false;
214   }
215 
216   public static void printDelDocs(Bits bits) {
217     if (bits == null) return;
218     for (int x = 0; x < bits.length(); x++) {
219       System.out.println(x + ":" + bits.get(x));
220     }
221   }
222 
223   public int[] toDocsArray(Term term, Bits bits, IndexReader reader)
224       throws IOException {
225     Fields fields = MultiFields.getFields(reader);
226     Terms cterms = fields.terms(term.field);
227     TermsEnum ctermsEnum = cterms.iterator();
228     if (ctermsEnum.seekExact(new BytesRef(term.text()))) {
229       PostingsEnum postingsEnum = TestUtil.docs(random(), ctermsEnum, null, PostingsEnum.NONE);
230       return toArray(postingsEnum);
231     }
232     return null;
233   }
234 
235   public static int[] toArray(PostingsEnum postingsEnum) throws IOException {
236     List<Integer> docs = new ArrayList<>();
237     while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
238       int docID = postingsEnum.docID();
239       docs.add(docID);
240     }
241     return ArrayUtil.toIntArray(docs);
242   }
243 
244   public class RangeMergePolicy extends MergePolicy {
245     boolean doMerge = false;
246     int start;
247     int length;
248 
249     private final boolean useCompoundFile;
250 
251     private RangeMergePolicy(boolean useCompoundFile) {
252       this.useCompoundFile = useCompoundFile;
253     }
254 
255     @Override
256     public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer)
257         throws IOException {
258       MergeSpecification ms = new MergeSpecification();
259       if (doMerge) {
260         OneMerge om = new OneMerge(segmentInfos.asList().subList(start, start + length));
261         ms.add(om);
262         doMerge = false;
263         return ms;
264       }
265       return null;
266     }
267 
268     @Override
269     public MergeSpecification findForcedMerges(SegmentInfos segmentInfos,
270         int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer)
271         throws IOException {
272       return null;
273     }
274 
275     @Override
276     public MergeSpecification findForcedDeletesMerges(
277         SegmentInfos segmentInfos, IndexWriter writer) throws IOException {
278       return null;
279     }
280 
281     @Override
282     public boolean useCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment, IndexWriter writer) {
283       return useCompoundFile;
284     }
285   }
286 }